/*******************************************************************************
* This program generates earnings sample and earnings related variables
 - gvkey
 - fyearq: fiscal year
 - fqtr: fiscal quarter
 - rdq1: earnings announcement date, adjusted for holidays or after 4pm announcements
 - sue_2: standardized earnings surprise based on seasonal random walk model
 - evol2: earnings volatility
 - ep3: earnings persistence
 - size: market capitalization, $millions
 - bm: book-to-market ratio
 - nrdq: the number of same-day announcements
 - rptlag: reporting lag in days

**************************************************************************************/
 
%let wrds = wrds.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;


rsubmit;
/* empty work library */
proc datasets library=work kill; run;
quit;
/* MAIN BODY OF THE PROGRAM */
options errors=1 noovp; 
options nocenter ps=max ls=78; 
options mprint source nodate symbolgen macrogen; 
options msglevel=i; 
  
libname mine '~'; *define a home directory on WRDS;
%global begindate enddate actu detu comp_list lm_filter
        ibes_vars ibes_where1 ibes_where2 comp_where;  
%let bdate=01jan1970;        * start calendar date of fiscal period end;
%let edate=&sysdate9.;      * end calendar date of fiscal period end;
%let actu=ibes.actu_epsus;   * name of IBES dataset containing unadjusted actuals;
%let detu=ibes.detu_epsus;   * name of IBEs dataset containing unadjusted estimates;
%let begindate=%sysfunc(putn("&bdate"d,5.)); 
%let enddate=%sysfunc(putn("&edate"d,5.)); 
  
/*Variables to extract from Compustat*/
%let comp_list= gvkey fyearq fqtr conm tic cusip cik datadate rdq epsfxq epspxq prccq ajexq spiq 
cshoq cshprq cshfdq rdq saleq atq fyr consol indfmt datafmt popsrc datafqtr; 
  
/*Variables to extract from IBES*/
%let ibes_vars= ticker value fpedats anndats revdats measure fpi estimator 
                analys pdf usfirm; 
  
/*IBES filters;*/
/*%let ibes_where1=*/
/* where=(measure='EPS' and fpi in ('6','7') and &begindate <= fpedats <= &enddate); */
%let ibes_where1=where=(measure='EPS' and fpi in ('6','7') and &begindate<=fpedats<=&enddate);
%let ibes_where2=
 where=(missing(repdats)=0 and missing(anndats)=0 and 0 < intck('day',anndats, 
 repdats) <= 90); 
  
/*timing and primary filters for Compustat Xpressfeed;*/
%let comp_where= where=(fyr>0 and (saleq>0 or atq>0) and consol='C' and popsrc='D' and 
           indfmt='INDL' and datafmt='STD' and missing(datafqtr)=0); 
%let comp_where= where=(fyr>0 and (saleq>0 or atq>0) and consol='C' and popsrc='D' and 
         indfmt='INDL' and datafmt='STD' and missing(datafqtr)=0);
  
/*filter from LM (2006):*/ 
/*- earnings announcement date is reported in Compustat*/  
/*- the price per share is available from Compustat as of the end of the fiscal quarter*/   
/*- price is greater than $1*/  
/*- the market (book) value of equity at the fiscal quarter end is available and is*/  
/*   larger than $5 mil;*/  
/*%let LM_filter=(missing(rdq)=0 and prccq>1 and mcap>5.0); */
%let LM_filter=(missing(rdq)=0);
 
/*define a set of auxiliary macros;*/  
%include '/wrds/ibes/samples/cibeslink.sas'; 
%include '/wrds/ibes/samples/ibes_sample.sas'; 
%include '/wrds/comp/samples/sue.sas'; 
* %include '/wrds/comp/samples/size_bm.sas'; 
/* ********************************************************************************* */
/* ************** W R D S   R E S E A R C H   A P P L I C A T I O N S ************** */
/* ********************************************************************************* */
/* Program   : SIZE_BM.SAS                                                           */
/* Summary   : Assign stocks into 6 Size-BM portfolios                               */
/* Date      : February 2008. Modified Mar 2011                                      */
/* Author    : Denys Glushkov, WRDS                                                  */
/*                                                                                   */
/* Details   : Macro assigns the stocks into six Size-BM portfolios based on the     */
/*             methodology outlined on Ken French webiste at                         */
/*             http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library   */
/*             /six_portfolios.html                                                  */ 
/*                                                                                   */
/* The size breakpoint for year t is the median NYSE market equity at the end of June*/ 
/* of year t. BE/ME for June of year t is the book equity for the last fiscal year   */
/* end in t-1 divided by ME for December of t-1. The BE/ME breakpoints are the 30th  */
/* and 70th NYSE percentiles.                                                        */
/*                                                                                   */
/* Parameters : - BDATE: Sample Start Date                                           */
/*              - EDATE: Sample End Date                                             */
/*              - Link : Dataset containing map b/w IBES Ticker and Compustat GVKEY  */
/*                                                                                   */
/* To run the program, a user should have access to CRSP daily and monthly stock,    */
/* Compustat Annual and Quarterly sets, IBES and CRSP/Compustat Merged database      */
/* ********************************************************************************* */

%MACRO SIZE_BM (bdate=, edate=, link=);
%LOCAL a ;%LET a=%NRSTR(%MEND);   %*--to get SAS-Coloring Back;

/****************************************************************
Step 1. Extract CRSP Data for NYSE and AMEX Common Stocks  
        Merge historical codes with CRSP Monthly Stock File      
        Restriction on Share Code: common shares only            
****************************************************************/
     
%let filtr = (shrcd in (10,11));         
* Selected variables from the CRSP Monthly Stock File;     
%let fvars =  prc ret shrout;                                
*  Selected variables from the CRSP Monthly Event File;
%let evars =  exchcd shrcd dlret;                            
* Modify beginning and ending dates;
%let begdate=intck('year',&begindate,-1); 

/* Invoke CRSPMERGE WRDS Research Macro. Data Output: CRSP_M */
%crspmerge(s=m,start=&bdate,end=&edate,sfvars=&fvars,sevars=&evars,filters=&filtr); 

data msex2;
   set crsp_m;
   by permno date;
   * Create size variable;
   size=abs(prc)*shrout; 
   size_lag=lag(size); *Lag Size for weights;
   ldate = lag(date);
   if first.permno then size_lag = size / (1+ret); 
   * Option for Delisting Returns;
   ret = sum(ret,dlret);
   * Comment previous line not to adjust for delisting events;
   if size > 0;
   drop prc shrout ldate;
run;

/************************************************
Step 2. Assign Stocks to NYSE Size-Based groups 
************************************************/
proc sort data=msex2 (keep=date size exchcd) out=msex3;
   where month(date)=6 and exchcd=1;
   by date;
run;

proc means data=msex3 noprint;
   var size;
   by date;
   output out=nyse (drop=_freq_ _type_) median=/autoname;
run;

proc sql;
   create table size_assign
   as select a.permno, a.date, a.size,
  case when size <= size_median then 'Small' else 'Big'
    end as size_port
   from msex2 (keep=permno date size where = (month(date)=6)) as a
   left join nyse as b
   on a.date= b.date;
quit;

/*************************************************************
2. Create Book Equity(BE) measure 
from Compustat (definition from Daniel and Titman (JF, 2006)
"Market Reactions to Tangible and Intangible Information"
*************************************************************/
data comp_extract;
   set comp.funda 
   (where=(fyr > 0 and at > 0 and consol='C' and 
           indfmt='INDL' and datafmt='STD' and popsrc='D'));
   if missing(SEQ)=0 then she=SEQ;else
   if missing(CEQ)=0 and missing(PSTK)=0 then she=CEQ+PSTK;else
   if missing(AT)=0 and missing(LT)=0 and missing(MIB)=0 then she=AT-(LT+MIB);
   else she=.;
   if missing(PSTKRV)=0 then BE0=she-PSTKRV;else 
   if missing(PSTKL)=0 then BE0=she-PSTKL; else 
   if missing(PSTK)=0 then BE0=she-PSTK; else BE0=.;
   * Converts fiscal year into calendar year data;
   if (1 <= fyr <= 5) then date_fyend=intnx('month',mdy(fyr,1,fyear+1),0,'end');
   else if (6 <= fyr <= 12) then date_fyend=intnx('month',mdy(fyr,1,fyear),0,'end');
   calyear=year(date_fyend);
   format date_fyend date9.;
 * Accounting data since calendar year 't-1';
   if (year(date_fyend) >= year(&begindate) - 1) 
      and (year(date_fyend) <= year(&enddate) + 1);
   keep gvkey calyear fyr BE0 date_fyend indfmt consol datafmt popsrc datadate TXDITC;
run;

proc sql; 
  create table comp_extract
   as   select a.gvkey, a.calyear, a.fyr, a.date_fyend, 
        case when missing(TXDITC)=0 and missing(PRBA)=0 then BE0+TXDITC-PRBA else BE0
  end as BE
   from comp_extract a left join 
        comp.aco_pnfnda (keep=gvkey indfmt consol datafmt popsrc datadate prba) b
   on   a.gvkey=b.gvkey and a.indfmt=b.indfmt and 
        a.consol=b.consol and a.datafmt=b.datafmt and 
        a.popsrc=b.popsrc and a.datadate=b.datadate;
quit;

/******************************************************
Step 3. Create Book to Market (BM) ratios at December 
******************************************************/
proc sql;
   create table BM0 (where=(BM>0))
   as select a.gvkey, a.calyear, c.permno, c.exchcd, c.date, 
   a.be/(abs(c.prc)*c.shrout/1000) as BM
   from comp_extract as a, 
    &link as b,   
    crsp_m (where=( month(date)=12)) as c
  where a.gvkey=b.gvkey 
   and ((b.linkdt <= c.date <= b.linkenddt) 
   or  (b.linkdt<=c.date and b.linkenddt=.E) 
   or  (c.date <= b.linkenddt and b.linkdt=.B)) 
   and b.lpermno=c.permno
   and a.calyear = year(c.date) and (abs(c.prc)*c.shrout)>0
   and b.linktype in ("LC" "LN" "LU") 
   and b.linkprim IN ("C", "P");
   * update using new crsp.ccmxpf_lnkhist;
quit;
   
   


/********************************************************
4. Keep only those cases with valid stock market in June 
********************************************************/
proc sql;
   create table BM
   as select a.gvkey, a.permno, a.bm, a.calyear, a.date as decdate, 
       a.exchcd, b.date, b.size, b.size_port
   from BM0 as a, size_assign as b
   where a.permno=b.permno
   and intck('month',a.date,b.date)=6 and b.size>0;
quit;

/***************************************************
5. Assign stocks to NYSE BM-based groups 
***************************************************/
proc sort data=BM out=nyse1 (keep=permno bm calyear decdate);
   where exchcd=1;
   by decdate;
run;

proc univariate data=nyse1 noprint;
   var bm;
   by decdate;
   output out=nyse2 pctlpts = 30 70 pctlpre=per;
run;

*Merge back with master file that contains all securities 
from NYSE, Nasdaq and AMEX;
proc sql;
   create table bm1
   as select a.permno, a.gvkey, a.bm, a.size, a.size_port, a.date, a.decdate,
   case when bm <= per30 then 'Low'
      when per30 < bm <= per70 then 'Medium'
    else 'High' 
    end as bm_port
   from BM as a, nyse2 as b
   where a.decdate=b.decdate;
/* The 'date' variable refers to June, whereas                */
/* 'decdate' variable refers to December of the previous year */
quit;

proc sort data=bm1; by permno descending date;run;

data size_bm_port; set bm1;
by permno descending date;
leaddate=lag(date);
if first.permno then leaddate=intnx('month',date,-12,'end');
format date leaddate decdate date9.;
rename date=size_date decdate=bm_date;
label date='Valid date for firm size';
label decdate='Valid date for Book-to-Market';
run;

proc sort data=size_bm_port; by permno size_date;run;

proc sql; drop table nyse1, nyse2, nyse, size_assign, 
    msex2, msex3, bm, bm0, bm1, comp_extract;
quit;
%MEND;
%SIZE_BM (bdate=&bdate, edate=&edate, link= crsp.ccmxpf_linktable ); 
/* ********************************************************************************* */
/* *************  Material Copyright Wharton Research Data Services  *************** */
/* ****************************** All Rights Reserved ****************************** */
/* ********************************************************************************* */
*build CRSP-IBES permno-ticker link;
%include '/wrds/ibes/samples/iclink.sas'; 
  
/*CIBESLINK macro will create a linking table CIBESLNK between IBES ticker and
Compustat GVKEY based on IBES ticker-CRSP permno (ICLINK) and CCM link; */
%CIBESLINK (begdt=&begindate, enddt=&enddate); 
  
/*Read in IBES tickers from the specified file stored 
in the user's home directory on WRDS*/

/*proc upload data = tickers out=tickers;run;*/


/*******************************************************************************************/
/*   Program       : IBES_Sample.sas                                                       */
/*   Author        : Denys Glushkov, WRDS                                                  */
/*   Date Created  : Feb 2008                                                              */  
/*   Last Modified : June 2009                                                             */
/*                                                                                         */
/*   Description: Extract estimates and link to actuals                                    */
/*                                                                                         */
/* Macro IBES_SAMPLE extracts the estimates from the IBES Unadjusted file based on         */
/* the user-provided input, links them to actuals, puts estimates and actuals on the       */
/* same basis by adjusting for stock splits using CRSP adjustment factor and calculates    */
/* the median of analyst forecasts made in the 90 days prior to the earnings announcement  */
/* date.                                                                                   */
/*******************************************************************************************/
 
libname mine '~'; /* Home directory that stores iclink table */
 
%let detu = detu_epsus; /* Unadjusted Detail history with EPS measure in the US region*/
%let actu = actu_epsus; /* Unadjusted Actual with EPS measure in the US region*/
 
 
%MACRO IBES_SAMPLE (infile=, ibes1_where=, ibes2_where=, ibes_var=);
 
proc sql; create table ibes (drop=measure fpi)
        as select *
        from ibes.&detu (&ibes1_where keep=&ibes_var) as a,             /* ibes1_where and
ibes_var are specified*/
             &infile as b                                              /* prior to invoking IBES_SAMPLE*/
        where a.ticker=b.ticker
        order by a.ticker, fpedats, estimator, analys, anndats, revdats;
quit;
 
/*Select the last estimate for a firm within broker-analyst group*/    
data ibes; set ibes;
        by ticker fpedats estimator analys;
        if last.analys;
run;
 
/*How many estimates are reported on primary/diluted basis?*/
proc sql;
        create table ibes
                as select a.*, sum(pdf='P') as p_count, sum(pdf='D') as d_count
                from ibes as a
                group by ticker, fpedats;
 
/* a. Link unadjusted estimates with unadjusted actuals and CRSP permnos                                */
/* b. Adjust report and estimate dates to be CRSP trading days                                          */
        create table ibes1 (&ibes2_where)
                as select a.*, b.anndats as repdats, b.value as act, b.ANNTIMS, c.permno,
                case when weekday(a.anndats)=1 then intnx('day',a.anndats,-2)                  /*if sunday move back by 2 days;*/
                     when weekday(a.anndats)=7 then intnx('day',a.anndats,-1) else a.anndats   /*if saturday move back by 1 day*/
                end as estdats1,
                case when weekday(b.anndats)=1 then intnx('day',b.anndats,1)                  /*if sunday move forward by 1 day  */
                     when weekday(b.anndats)=7 then intnx('day',b.anndats,2) else b.anndats   /*if saturday move forward by 2 days*/
  
              end as repdats1
                from ibes as a, ibes.&actu as b, mine.iclink as c
                where a.ticker=b.ticker and a.fpedats=b.pends and a.usfirm=b.usfirm and b.pdicity='QTR'
                          and b.measure='EPS' and a.ticker=c.ticker and c.score in (0,1,2);
 
/*   Making sure that estimates and actuals are on the same basis
                        */
/*   1. retrieve CRSP cumulative adjustment factor for IBES report and estimate dates                                           */
        create table adjfactor
                as select distinct a.*
                from crsp.dsf (keep=permno date cfacshr) as a, ibes1 as b
                where a.permno=b.permno and (a.date=b.estdats1 or a.date=b.repdats1);
         
/*      2.if adjustment factors are not the same, adjust the estimate to be on the same basis with the actual   */
        create table ibes1
                as select distinct a.*, b.est_factor, c.rep_factor,
                        case when (b.est_factor ne c.rep_factor) and missing(b.est_factor)=0 and missing(c.rep_factor)=0
                         then (rep_factor/est_factor)*value else value end as new_value
                from ibes1 as a,
                        adjfactor (rename=(cfacshr=est_factor)) as b,
                        adjfactor (rename=(cfacshr=rep_factor)) as c
                        where (a.permno=b.permno and a.estdats1=b.date) and
                                  (a.permno=c.permno and a.repdats1=c.date);
quit;
 
/* Make sure the last observation per analyst is included
                        */
proc sort data=ibes1;
        by ticker fpedats estimator analys anndats revdats;
run;
 
data ibes1; set ibes1;
by ticker fpedats estimator analys;
if last.analys;
run;
 
/* Compute the median forecast based on estimates in the 90 days prior to the report date                            */
proc means data=ibes1 noprint;
        by ticker fpedats;
        var /*value*/ new_value;                         /* new_value is the estimate appropriately adjusted         */
        output out= medest (drop=_type_ _freq_)         /* to be on the same basis with the actual reported earnings */
        median=medest n=numest;
run;
 
/* Merge median estimates with ancillary information on permno, actuals and report dates                              */
/* Determine whether most analysts are reporting estimates on primary or diluted basis                                */
/* following the methodology outlined in Livnat and Mendenhall (2006)                                                 */
proc sql; create table medest
        as select distinct a.*, b.repdats, b.act, b.permno, b.ANNTIMS, b.rep_factor
               label = 'Cumulative Adjument Factor for IBES Earnings',
        case when p_count>d_count then 'P'
             when p_count<=d_count then 'D'
        end as basis                                                                            
        from medest as a left join ibes1 as b
        on a.ticker=b.ticker and a.fpedats=b.fpedats;
quit;
 
proc sql;
        drop table ibes, ibes1;
quit;
%MEND;

/* Macro IBES_SAMPLE extracts the estimates from IBES Unadjusted file based on the   */ 
/* user-provided input (SAS set tickers), links them to IBES actuals, puts estimates */ 
/* and actuals on the same basis by adjusting for stock splits using CRSP adjustment */ 
/* factor and calculates the median of analyst forecasts made in the 90 days prior to*/
/* the earnings announcement date. Outputs file MEDEST into work directory           */ 
%IBES_SAMPLE (infile=ibes.id, ibes1_where=&ibes_where1, ibes2_where=&ibes_where2,  
ibes_var=&ibes_vars); 
  
/* Extracting Compustat Data                                                         */
/* Use CIBESLNK table as well as header GVKEY-IBES Ticker map in Compustat Security  */
/* table to link IBES Ticker and GVKEY                                               */


proc sql; 
   create table gvkeys 
   as select a.* 
   from cibeslnk as a, (select distinct ticker from ibes.id) as b 
   where a.ticker=b.ticker;
  
   create table comp (drop=consol indfmt datafmt popsrc) 
   as select a.*, cshoq*prccq as mcap 
   from comp.fundq (keep=&comp_list &comp_where) as a, 
/*   from comp.fundq (keep=&comp_list) as a, */
   gvkeys as b 
   where a.gvkey=b.gvkey; 
  
   create table comp 
   as select * 
   from comp a left join 
   (select distinct gvkey,ibtic from comp.security 
   (where=(missing(ibtic)=0))) b 
   on a.gvkey=b.gvkey; 
quit; 
  
/* Create calendar date of fiscal period end in Compustat extract*/ 
data comp; set comp; 
   if (1<=fyr<=5) then date_fyend=intnx('month',mdy(fyr,1,fyearq+1),0,'end'); 
   else if (6<=fyr<=12) then date_fyend=intnx('month',mdy(fyr,1,fyearq),0,'end'); 
   fqenddt=intnx('month',date_fyend,-3*(4-fqtr),'end'); 
   format fqenddt date9.; 
   drop date_fyend; 
run; 
  
/* a) Link Gvkey with CRSP Permno*/

/*%iclink (ibesid=ibes.id, crspid=crspa.stocknames, outset=work.lnk); */
* Linking between gvkey and permno are updated using the new link table ccmxpf_linkhis;

proc sql; 
/*   create table comp1 */
/*   as select a.*, b.lpermno */
/*   from comp (where=(&begindate<=fqenddt<=&enddate)) as a left join crsp.ccmxpf_linktable*/
/*  as b */
/*   on a.gvkey=b.gvkey and ((b.linkdt<=a.fqenddt <=b.linkenddt) or */
/*   (b.linkdt<=a.fqenddt and b.linkenddt=.E) or */
/*   (b.linkdt=.B and a.fqenddt <=b.linkenddt)) and b.usedflag=1*/
/*  and  b.linktype in ("LC" "LN" "LU" "LX" "LD" "LS") */
/*    and b.linkprim IN ("C", "P")  ; */

   create table comp1 
   as select a.*, b.lpermno 
   from comp (where=(&begindate<=fqenddt<=&enddate)) as a left join crsp.ccmxpf_lnkhist
  as b 
   on a.gvkey=b.gvkey and ((b.linkdt<=a.fqenddt <=b.linkenddt) or 
   (b.linkdt<=a.fqenddt and b.linkenddt=.E) or 
   (b.linkdt=.B and a.fqenddt <=b.linkenddt))
  and  b.linktype in ("LC" "LN" "LU") 
    and b.linkprim IN ("C", "P")  ; 
  
/* b) Link Gvkey with IBES Ticker*/
create table comp1 
   as select a.*, b.ticker 
   from comp1 as a left join cibeslnk as b 
   on a.gvkey=b.gvkey and ((b.fdate<=a.fqenddt <=b.ldate) or 
   (b.fdate<=a.fqenddt and b.ldate=.E) or (b.fdate=.B and a.fqenddt <=b.ldate)); 
  
/* c) Link IBES analysts' expectations (MEDEST), IBES report dates (repdats) */
/* and actuals (act) with Compustat data; MEDEST table is a result of        */
/* running IBES_SAMPLE macro                                                 */
create table comp1 
   as select a.*, b.medest, b.numest,b.repdats, b.act, b.basis, b.ANNTIMS, b.rep_factor
   from comp1 as a left join medest as b 
   on a.ticker=b.ticker and 
   put(a.fqenddt,yymmn6.)=put(b.fpedats,yymmn6.); 
quit; 
  
/*remove fully duplicate records and pre-sort*/
proc sort data=comp1; by gvkey fyearq fqtr;run; 
/*proc download data = comp1 out = work.comp1; */
/*run;*/

/*proc sort data=comp1 nodupkey out=lala; by gvkey fyearq fqtr;run; */
/*proc download data = medest out = work.medest; run;*/
/* match -5 trading day prc */
%let t = 5;
* step 1, create distinct trading days from crsp.dsi;
proc sql;
  create table tdays
  as select distinct date from crsp.dsi
  order by date;
  quit;
data tdays;
  set tdays;
  number = _n_;
  run;
* step 2, match the corresponding rank number to rdq;
/*data tdays; set rep.tdays; run;*/
* If rdq and repdats differ, use the earliest one following 
  DellaVigna and Pollet (2009), who show that earlier date is 
  usually the actual date of announcement while the later date 
  is that of publication in the Wall Street Journal;
data comp1;
  set comp1;
  rdq_min =  min(rdq, repdats);
  format rdq_min date9.;
  run;
proc sql;
  create view eads as select distinct rdq_min from comp1;

  create table eads1
  as select a.*, b.date as rdq_t format=date9., b.number
  from eads a left join tdays as b
  on b.date - a.rdq_min >= 0
  group by rdq_min
  having b.date - a.rdq_min = min(b.date - a.rdq_min);

  create table temp2
  as select a.*, b.rdq_t, b.number
  from comp1 a left join eads1 as b
  on a.rdq_min = b.rdq_min
  order by a.gvkey, a.fyearq desc, a.fqtr desc;
  quit;
* Step 3, Adjust for annoucenment time if ANNTIMS ne . and repdats = rdq_min and repdats is also a trading day;
* For all these cases, move rdq_t to the next trading day using number variable;
* Patton and Verado (2012) show that 33% of announcements with valid IBES timestaps occur after 4pm;
proc sql;
  create table af4pm 
  as select distinct rdq_min, gvkey, number from temp2
  where hour(ANNTIMS)>=16 and repdats = rdq_min and rdq_t = rdq_min;

  create table af4pm1
  as select a.*, b.date as rdq_t_plus format=date9.
  from af4pm as a left join tdays as b
  on b.number - a.number =1;

  create table temp3
  as select a.*, max(a.rdq_t, b.rdq_t_plus) as rdq1 format = date9.
  from temp2 as a left join af4pm1 as b
  on a.gvkey = b.gvkey and a.rdq_min =  b.rdq_min;

  quit;

* Step 4, Match crsp.dsf to it;
* first match cfacshr to fqdenddt to be used later;
proc sql;
  create table temp4
  as select a.*, b.cfacshr as qtr_factor
  from temp3 as a left join crsp.dsf(keep=permno date cfacshr) as b
  on a.lpermno =  b.permno and a.fqenddt = b.date;

* match prc and cfashr to -t days relative to rdq1;
  create table temp5
  as select a.*, abs(b.prc) as prc_&t, b.cfacshr as minus&t._factor label = 'cfacshr matched to -t trading days'
  from temp4 as a left join 
  (select a.*, b.number from crsp.dsf(keep = permno date cfacshr prc) as a, tdays as b
  where a.date = b.date) as b
  on a.lpermno = b.permno and a.number - b.number = &t;
  quit;

proc sort data=temp5 out = temp6 nodupkey; by gvkey fyearq fqtr;run;
proc sql;
  drop table comp_final1, comp_final2, comp_final3, comp_final_1, comp_final_1, comp_final_1;
  quit;
/*%let t=10;*/
/*rsubmit;*/
/*proc sql;*/
/*  create table input1*/
/*  as select a.*, abs(b.prc) as prc*/
/*  from comp1 as a left join */
/*     crsp.dsf (keep=permno prc date) as b*/
/*    on a.lpermno=b.permno and intnx('day',rdq,-&t)<=b.date<=rdq*/
/*  group by a.lpermno, rdq*/
/*  having b.date-a.rdq=min(b.date-a.rdq);*/
/*  quit;*/


/***********************************************************************************************/
/* Program      : SUE.sas                                                                      */
/* Author       : Denys Glushkov, WRDS                                                         */
/* Date Created : Feb 2008                                                                     */  
/* Last Modified: Jun 2009                                                       */
/*                                                                                             */
/* Description: 3 Methods of calculating standardized earnings surprises                       */
/*                                                                                             */
/* Macro SUE calculates standardized earnings surprises using 3 (three) methods considered     */
/* by LM (2006). Method 1 assumes a rolling seasonal random walk model. Method 2 excludes      */
/* "special items" from the Compustat Data. In these two methods, if most analyst forecasts of */
/* EPS are based on diluted (primary) EPS, Macro uses Compustat's diluted (basic) figures      */
/* Method 3 is based solely on IBES median estimates/actuals and does not use Compustat data   */
/***********************************************************************************************/
 
%MACRO SUE (method=, input=, lag = );
/* Process Compustat Data on a seasonal year-quarter basis*/

%local i;
%do i=1 %to 4;
proc sort data=&input (where=(fqtr=&i)) out=qtr;
    by gvkey fyearq fqtr;
run;
data qtr; set qtr;
    by gvkey fyearq;
    %if &method=1 %then
     %do;
        lageps_p=lag(epspxq);lageps_d=lag(epsfxq);lagadj=lag(ajexq);
        if first.gvkey then do; lageps_p=.;lageps_d=.;lagadj=.;end;
        select (basis);
        when ('P') do; actual=epspxq/ajexq; expected=lageps_p/lagadj;end;
        when ('D') do; actual=epsfxq/ajexq; expected=lageps_d/lagadj;end;
        otherwise do; actual=epspxq/ajexq; expected=lageps_p/lagadj;end;
        end;
        drop lageps_p lageps_d lagadj;
        deflator=prccq/ajexq;
    deflator&lag=prc_&lag/ajexq;
    %end;%else;
    %if &method=2 %then
     %do;
        lageps_p=lag(epspxq);lagshr_p=lag(cshprq);lagadj=lag(ajexq);
        lageps_d=lag(epsfxq);lagshr_d=lag(cshfdq);lagspiq=lag(spiq);
        if first.gvkey then do; lageps_p=.;lageps_d=.;lagshr_p=.;
                                lagshr_d=.;lagadj=.;lagspiq=.;end;
        select (basis);
        when ('P') do; actual=sum(epspxq,-0.65*spiq/cshprq)/ajexq; expected=sum(lageps_p,-0.65*lagspiq/lagshr_p)/lagadj;end;
        when ('D') do; actual=sum(epsfxq,-0.65*spiq/cshfdq)/ajexq; expected=sum(lageps_d,-0.65*lagspiq/lagshr_d)/lagadj;end;
        otherwise do; actual=sum(epspxq,-0.65*spiq/cshprq)/ajexq; expected=sum(lageps_p,-0.65*lagspiq/lagshr_p)/lagadj;end;
        end;
        drop lageps_p lagshr_p lagadj lageps_d lagshr_d lagspiq;
        deflator=prccq/ajexq;
        deflator&lag=prc_&lag/ajexq;
        %end;%else;
    %if &method=3 %then
     %do;
        actual=act;
    lag1yr_actual = lag(act); lag1yr_rep_factor = lag(rep_factor);
    if first.gvkey then do; lag1yr_actual = .; lag1yr_rep_factor = .; end;
        expected=medest;
    * since both actual and expected here are split-adjusted to report/announecement date;
        deflator=prccq*(rep_factor/ajexq); * qtr_end is cfacshr matched to fqenddt and through verification its identical to ajexq;
    deflator&lag=prc_&lag*(rep_factor/minus&lag._factor);
     %end;
    sue&method=(actual-expected)/deflator;
  sue&method._&lag=(actual-expected)/deflator&lag;
    format sue&method percent7.4;

run;
 
proc append base=comp_final&method data=qtr;run;
proc sql; drop table qtr;quit;
%end;
 
proc sort data=comp_final&method; by gvkey fyearq fqtr;run;
%MEND;



%MACRO Allsurprises(tlag=); 

%do k=1 %to 3; 
%SUE (method=&k, input=temp6, lag = &tlag); 
%if &k=1 %then
%do;
proc sql;
   create table comp_final_&k
   as select distinct a.*,
   (a.actual-a.expected)/std(b.actual-b.expected) as sue_&k, count(b.actual) as count&k
   label = 'Number of Quarters included from previous 2 years'
     from comp_final&k as a left join comp_final&k as b
     on a.gvkey=b.gvkey and  0<=4*(a.fyearq-b.fyearq)+ (a.fqtr-b.fqtr)<=7
   group by a.gvkey, a.fyearq, a.fqtr;
   quit;
   proc sort data=comp_final_&k; by gvkey fyearq fqtr;run;
%end;%else;

%if &k =2 %then
%do;
/* Random Walk Earnings (scaled by std), Earnings Volatility */
proc sql;
    create table comp_final_&k
    as select distinct a.*,
    (a.actual-a.expected)/std(b.actual-b.expected) as sue_&k, 
    count(b.actual) as count&k
    label = 'Number of Quarters included from previous 2 years',
    /* Earnings volatility using sue_2, past 7 quarters*/
    std(b.actual-b.expected) as evol2
    label = 'Earnings Volatility using sue_2'
    from comp_final&k as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  0<=4*(a.fyearq-b.fyearq)+ (a.fqtr-b.fqtr)<=7
    group by a.gvkey, a.fyearq, a.fqtr;
    quit;
proc sort data=comp_final_&k; by gvkey fyearq fqtr;run;
/* Earnings Persistence */
* step 1, tile up the observations to current quarter;
proc sql;
    create table ep2 
    as select distinct a.gvkey, a.fyearq, a.fqtr, b.actual,
  b.fyearq as evtyr, b.fqtr as evtqtr,
  count(b.actual) as count
    from comp_final&k as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  0<=4*(a.fyearq-b.fyearq)+ (a.fqtr-b.fqtr)<=7
    group by a.gvkey, a.fyearq, a.fqtr;
  quit;
proc sql;
    create table ep2 
    as select distinct a.*, b.actual as lag_actual
    from ep2 as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  4*(a.evtyr-b.fyearq)+(a.evtqtr-b.fqtr)=1
    order by a.gvkey, a.fyearq, a.fqtr;
    quit;
* step 2, run regression by gvkey fyearq and fqtr;
ods listing close;
ods graphics off;            /* or use the %ODSOff macro */
ods exclude all;             /* suspend all open destinations */
options nonotes NOSERROR;
proc reg data=ep2 outest=est2 edf;
    by gvkey fyearq fqtr;
    where count>=4;
    model actual = lag_actual;
    run;
ods listing;
ods exclude none; 
proc printto; run;
options notes SERROR;

* step 3, merge the persistence coefficient back to comp_final_&k;
proc sql;
  create table comp_final_&k
  as select a.*, b.lag_actual as ep2 label = 'Earnings Persisetnce SUE_2'
  from comp_final_&k as a left join est2(where=(_edf_>=2)) as b
  on a.gvkey =b.gvkey and a.fyearq = b.fyearq and a.fqtr = b.fqtr
  order by gvkey, fyearq, fqtr;
  quit;

%end;%else;
%if &k = 3 %then
%do;
proc sql;
    create table comp_final_&k
    as select distinct a.*,
    (a.actual-a.expected)/std(b.actual-b.expected) as sue_&k, 
    count(b.actual) as count&k
    label = 'Number of Quarters included from previous 2 years',
    /* Earnings volatility using sue3, past 7 quarters*/
    std((b.actual/b.rep_factor)-(b.lag1yr_actual/b.lag1yr_rep_factor)) as evol3
    label = 'Earnings Volatility using sue3'
    from comp_final&k as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  0<=4*(a.fyearq-b.fyearq)+ (a.fqtr-b.fqtr)<=7
    group by a.gvkey, a.fyearq, a.fqtr;
    quit;

/* Earnings Persistence */
* step 1, tile up the observations to current quarter;
proc sql;
    create table ep3
    as select distinct a.gvkey, a.fyearq, a.fqtr, b.actual,
  b.fyearq as evtyr, b.fqtr as evtqtr,
  count(b.actual) as count
    from comp_final&k as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  0<=4*(a.fyearq-b.fyearq)+ (a.fqtr-b.fqtr)<=7
    group by a.gvkey, a.fyearq, a.fqtr;
  quit;
proc sql;
    create table ep3
    as select distinct a.*, b.actual as lag_actual
    from ep3 as a left join comp_final&k as b
    on a.gvkey=b.gvkey and  4*(a.evtyr-b.fyearq)+(a.evtqtr-b.fqtr)=1
    order by a.gvkey, a.fyearq, a.fqtr;
    quit;
* step 2, run regression by gvkey fyearq and fqtr;
ods listing close;
ods graphics off;            /* or use the %ODSOff macro */
ods exclude all;             /* suspend all open destinations */
options nonotes NOSERROR;
proc reg data=ep3 outest=est3 edf;
    by gvkey fyearq fqtr;
    where count>=4;
    model actual = lag_actual;
    run;
ods listing;
ods exclude none; 
proc printto; run;
options notes SERROR;

* step 3, merge the persistence coefficient back to comp_final_&k;
proc sql;
  create table comp_final_&k
  as select a.*, b.lag_actual as ep3 label = 'Earnings Persisetnce SUE3'
  from comp_final_&k as a left join est3(where=(_edf_>=2)) as b
  on a.gvkey =b.gvkey and a.fyearq = b.fyearq and a.fqtr = b.fqtr
  order by gvkey, fyearq, fqtr;
  quit;
proc sort data=comp_final_&k; by gvkey fyearq fqtr;run;
%end;


%end;
%mend; 
%Allsurprises(tlag=&t); 
/* Merge all of the results together to get a dataset containing SUE1 , SUE2*/
/* and SUE3 for all relevant (GVKEY-Report date) pairs                      */   
data comp_final_check; 
  merge comp_final2 
     comp_final1 (keep=gvkey fyearq fqtr sue1 sue1_&t) 
       comp_final3 (keep=gvkey fyearq fqtr sue3 sue3_&t actual expected deflator deflator&t
            rename = (actual = actual3 expected =expected3 deflator = deflator3 deflator&t = deflator3_&t) )
       comp_final_1 (keep=gvkey fyearq fqtr sue_1 count1) 
       comp_final_2 (keep=gvkey fyearq fqtr sue_2 count2 evol2 ep2) 
       comp_final_3 (keep=gvkey fyearq fqtr sue_3 count3 evol3 ep3);
  by gvkey fyearq fqtr; 
/*  format rdq date9.;*/
/*  label fqenddt='Calendar date of fiscal period end'; */
/*  keep ticker ibtic lpermno gvkey conm fyearq fqtr fyr fqenddt ajexq; */
/*  keep repdats rdq sue1 sue2 sue3 basis actual expected; */
/*  keep deflator act medest numest prccq mcap prc; */
/*  keep sue_1 sue_2 sue_3 count1 count2 count3;*/
/*  keep evol2 ep2 evol3 ep3;*/
/*  keep sue1_&t sue2_&t sue3_&t;*/
  run;

  * reporting lag;
  data comp_final_check;
    set comp_final_check; 
    rptlag = intck('day', fqenddt, rdq); 
  run;

  * nrdq: number of same-day announcements;
  proc sql;
    create table nrdq
    as select distinct a.rdq, count(b.rdq) as nrdq
    from (select distinct rdq from comp_final_check) as a, comp_final_check as b
    where a.rdq = b.rdq
    group by a.rdq;
  
    create table comp_final_check
    as select a.*, b.nrdq  
    from comp_final_check as a, nrdq as b
    where a.rdq = b.rdq;
  quit;


  * Add size and BM;
  proc sql; 
     create table comp_final_check 
     as select a.gvkey, a.lpermno as permno, a.fyearq, a.fqtr, a.rdq1, a.sue_2, 
       a.evol2, a.ep2, a.rptlag, a.nrdq, b.size, b.bm
     from comp_final_check as a left join size_bm_port as b 
     on a.permno=b.permno and b.size_date <= a.rdq1 < b.leaddate; 
  quit; 



proc download data =comp_final_check  out = rep.earnings;run;
endrsubmit;
